Find markers for less confidently annotated region (correlation-based & ICI annotation) using escoring

This is in Python

1. Import all needed functions

In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from escoring.enrichment_scoring import calculate_escores
from escoring.enrichment_scoring import permute, sig_interval
from escoring.support_funcs import load_sparse_h5, pairwise_similarities
from escoring.support_funcs import sig_dictionary

2. Load the data

Expression matrix

In [2]:
# This is the integrated (batch-corrected) expression matrix from atlas
HVG = load_sparse_h5("counts","./supp_data/hvg_integrated.h5")
In [3]:
type(HVG)
Out[3]:
scipy.sparse.csr.csr_matrix
In [4]:
# Escoring accepts matrices where rows are cells and columns are genes. So one will need to transpose the matrix if the matrix is gene-by-cell
HVG = HVG.T
In [5]:
HVG.shape
Out[5]:
(110427, 17513)
In [8]:
fname = "./supp_data/hvg_ids.txt"
with open(fname, "r") as f:
    hvg_names = [gene.strip("\n") for gene in f.readlines()]
f.close()

Reference cells

In [17]:
# Ramdomly select cells in region that is not confidently annotated by correlation-based and ici method
fname = "./supp_data/selected.cells.txt"
with open(fname, "r") as f:
    r_cells = [int(cell.strip("\n")) for cell in f.readlines()]
f.close()

50 UMAP dimensions for similarity calculations

In [20]:
UMAP_50 = np.loadtxt("./supp_data/umap50.txt")  # or load your preferred representation

2D UMAP for visualization

In [21]:
UMAP = np.loadtxt("./supp_data/umap.txt")

3. Enrichment scoring

In [22]:
# RBF kernel is chosen
metric = "rbf"  
gamma = 0.8  # only use if laplacian, sigmoid or rbf and replace by wished value
S = pairwise_similarities(UMAP_50, r_cells, metric=metric,
                          metric_params={"gamma": gamma}  # only use if needed
                         )
In [23]:
S.shape
Out[23]:
(110427, 30)
In [24]:
# Calculate the enrichment scores
escores = calculate_escores(HVG, r_cells, S=S, optim_over="cols", scale_exp=False)
Start enrichment scoring using 160 CPUs
Creating process pool
Run enrichment scoring
Enrichment scoring complete

The escores dataframe is a dataframe of size genes x r_cells. The order of genes is preserved, so you can map them back to the indices of the genes in the original data. Take care here that in Python, counting starts at 0 and not 1. If you need any help here, let me know. Below, I manually set the gene names to the index.

In [25]:
escores.index = hvg_names
In [26]:
escores.index
Out[26]:
Index(['AT1G05260', 'AT3G59370', 'AT2G36100', 'AT1G12080', 'AT1G12090',
       'AT4G11290', 'AT5G42180', 'AT5G66390', 'AT2G32300', 'AT2G02130',
       ...
       'AT4G06395', 'AT3G55440', 'AT3G03100', 'AT5G54760', 'AT2G33040',
       'AT2G42680', 'AT5G11770', 'AT5G08290', 'AT5G53300', 'AT5G64400'],
      dtype='object', length=17513)
In [27]:
# Permute the dataframe. This takes a little while.
n = 100  # how many times to permute the dataframe
seed = 42  # set this for reproducibility
P = permute(HVG, n=n, seed=seed)
In [28]:
pscores = calculate_escores(P, r_cells, S=S, optim_over="cols", scale_exp=False)
Start enrichment scoring using 160 CPUs
Creating process pool
Run enrichment scoring
Enrichment scoring complete
In [30]:
# Determine the significance cut-offs
n_sds = 5  # the number of SDs away from the mean for significance
cutoffs = sig_interval(pscores, n_sds=n_sds)
In [31]:
type(escores)
Out[31]:
pandas.core.frame.DataFrame
In [32]:
# Get a dictionary of significant genes per cell
sigs = sig_dictionary(escores, cutoffs)
In [33]:
sigs
Out[33]:
{1958: ['AT5G62330',
  'AT1G54890',
  'AT3G60530',
  'AT1G57590',
  'AT5G51500',
  'AT5G06270',
  'AT1G26945',
  'AT1G53130',
  'AT3G20360',
  'AT1G68150',
  'AT3G62280',
  'AT2G20800',
  'AT1G06930',
  'AT3G09455',
  'AT5G51490',
  'AT2G04090',
  'AT1G02440',
  'AT3G54410',
  'AT4G22235',
  'AT3G27950',
  'AT3G60440',
  'AT2G37260',
  'AT1G48470',
  'AT4G27950',
  'AT4G12170',
  'AT2G09005',
  'AT1G74810',
  'AT1G67400',
  'AT3G25905',
  'AT2G23672',
  'AT3G49110',
  'AT1G06773',
  'AT1G68470'],
 19425: ['AT3G02230',
  'AT5G25830',
  'AT4G00480',
  'AT5G44820',
  'AT5G25190',
  'AT3G62120',
  'AT3G61270',
  'AT5G04820',
  'AT3G17780',
  'AT5G14150',
  'AT1G07645',
  'AT1G09580',
  'AT1G47640',
  'AT1G60787',
  'AT3G52760',
  'AT3G07680',
  'AT1G13510',
  'AT5G60370',
  'AT2G34920',
  'AT4G17010',
  'AT4G21720',
  'AT2G23310',
  'AT3G58860',
  'AT2G45070',
  'AT4G03490',
  'AT3G03555',
  'AT3G03290',
  'AT3G03160',
  'AT5G10780',
  'AT2G29120',
  'AT1G33100',
  'AT1G55440'],
 51665: ['AT5G11420',
  'AT1G12845',
  'AT5G07990',
  'AT5G18840',
  'AT1G09750',
  'AT4G30420',
  'AT1G15550',
  'AT5G22300',
  'AT1G78570',
  'AT2G48020',
  'AT1G20840',
  'AT5G02270',
  'AT1G70280',
  'AT1G63330',
  'AT5G02000',
  'AT1G64900',
  'AT3G61660',
  'AT5G17050',
  'AT3G59140',
  'AT1G64920',
  'AT5G56040',
  'AT3G20110',
  'AT1G10640',
  'AT3G23930',
  'AT1G62580',
  'AT3G28040',
  'AT3G58620',
  'AT4G05780',
  'AT3G28200',
  'AT1G04520',
  'AT2G46930',
  'AT3G09300',
  'AT1G55120',
  'AT1G05835',
  'AT3G53010',
  'AT1G76100',
  'AT1G76670',
  'AT5G05170',
  'AT4G30060',
  'AT5G22355',
  'AT1G60630',
  'AT1G75780',
  'AT5G49215',
  'AT1G08510',
  'AT1G14500',
  'AT1G21070',
  'AT1G08200',
  'AT5G66080',
  'AT3G12120',
  'AT2G03600',
  'AT1G78040',
  'AT3G61580',
  'AT5G16340'],
 13742: ['AT1G30760',
  'AT1G02460',
  'AT4G28410',
  'AT4G37295',
  'AT5G20720',
  'AT2G43660',
  'AT2G43780',
  'AT1G11330',
  'AT5G13780',
  'AT1G26630'],
 80517: ['AT4G21310',
  'AT5G47920',
  'AT5G62940',
  'AT1G54330',
  'AT2G03500',
  'AT2G02000',
  'AT2G32280',
  'AT1G02705',
  'AT2G16850',
  'AT5G11100',
  'AT4G13600',
  'AT3G28455',
  'AT2G39830',
  'AT5G02460',
  'AT1G69970',
  'AT5G48480',
  'AT5G62960',
  'AT5G02750',
  'AT5G13880',
  'AT2G32275',
  'AT3G17010',
  'AT1G04680',
  'AT1G26790',
  'AT2G37610',
  'AT2G19580',
  'AT5G44680',
  'AT5G24920',
  'AT4G11140',
  'AT2G01940',
  'AT4G37180',
  'AT2G22730',
  'AT4G21300',
  'AT5G37485',
  'AT3G06435',
  'AT5G41905',
  'AT1G49560',
  'AT2G28420',
  'AT5G40150',
  'AT1G76190',
  'AT1G70510',
  'AT5G09500',
  'AT5G50610',
  'AT2G28150',
  'AT5G40830',
  'AT2G23970',
  'AT2G19572',
  'AT1G11120',
  'AT2G30060',
  'AT5G09700',
  'AT5G38920'],
 109272: ['AT5G02640',
  'AT5G60490',
  'AT1G79620',
  'AT3G10080',
  'AT1G68810',
  'AT2G28410',
  'AT1G01900',
  'AT3G08490',
  'AT1G27380',
  'AT2G47670',
  'AT1G03820',
  'AT5G60650',
  'AT5G64530',
  'AT4G36160',
  'AT2G21050',
  'AT3G21270',
  'AT5G06670',
  'AT1G53163',
  'AT4G08685',
  'AT5G53710',
  'AT2G37210',
  'AT3G42800',
  'AT3G25710',
  'AT1G63520',
  'AT1G14440',
  'AT2G34060',
  'AT3G17600',
  'AT4G34950',
  'AT1G52830',
  'AT1G14190',
  'AT5G66300',
  'AT5G06710',
  'AT4G06195',
  'AT5G24510',
  'AT3G44730',
  'AT3G61380',
  'AT1G78440',
  'AT1G67510',
  'AT1G03010',
  'AT2G39870',
  'AT1G23350',
  'AT4G13560',
  'AT4G30020',
  'AT2G18060',
  'AT2G46300',
  'AT1G26570',
  'AT5G01740',
  'AT3G16490',
  'AT5G01890',
  'AT3G05180',
  'AT3G29360',
  'AT1G07380',
  'AT5G53830',
  'AT4G15802',
  'AT1G12663',
  'AT2G19780',
  'AT3G54340',
  'AT3G17660',
  'AT3G55890',
  'AT1G54200',
  'AT1G15760',
  'AT5G61480',
  'AT1G33440',
  'AT2G46572',
  'AT1G26610',
  'AT4G03010',
  'AT2G28420',
  'AT5G37950',
  'AT2G22670',
  'AT5G60142',
  'AT1G70270',
  'AT3G19260',
  'AT2G35110',
  'AT1G16400',
  'AT4G11950',
  'AT5G55507',
  'AT2G04850',
  'AT1G70490',
  'AT4G34490',
  'AT3G56660',
  'AT4G34420',
  'AT5G02630',
  'AT2G28490',
  'AT2G04845',
  'AT5G64667',
  'AT2G30060',
  'AT1G78300',
  'AT1G14750',
  'AT4G30662',
  'AT5G36290',
  'AT5G53810',
  'AT3G56800'],
 10279: ['AT3G50870',
  'AT5G28640',
  'AT3G56220',
  'AT1G54580',
  'AT5G22580',
  'AT3G54220',
  'AT2G16580',
  'AT5G45200',
  'AT1G46264',
  'AT4G17970',
  'AT1G65900',
  'AT3G61660',
  'AT2G37300',
  'AT2G29125',
  'AT5G50375',
  'AT3G13175',
  'AT2G05990',
  'AT5G52920',
  'AT3G16950',
  'AT3G25860',
  'AT5G10160',
  'AT1G34430',
  'AT5G46290',
  'AT2G02070',
  'AT1G52670',
  'AT2G43360',
  'AT2G22230',
  'AT5G13710',
  'AT5G05890',
  'AT5G13140',
  'AT4G20070',
  'AT1G24360',
  'AT3G48610',
  'AT2G31920',
  'AT1G01090',
  'AT2G26520',
  'AT5G15530',
  'AT3G50230',
  'AT3G13190',
  'AT5G39220',
  'AT4G27250',
  'AT1G61170',
  'AT5G05900',
  'AT1G30120',
  'AT1G70580',
  'AT5G12970',
  'AT3G02630',
  'AT4G33580',
  'AT4G00390',
  'AT4G16155',
  'AT3G05190',
  'AT5G35360',
  'AT4G27760',
  'AT1G11125',
  'AT2G30370',
  'AT1G66780',
  'AT4G34640'],
 105466: ['AT1G18250',
  'AT2G03830',
  'AT3G55550',
  'AT4G35720',
  'AT1G26870',
  'AT1G75580',
  'AT3G30350',
  'AT3G10780',
  'AT1G35230',
  'AT5G26670',
  'AT4G35030',
  'AT5G48360',
  'AT4G33145',
  'AT3G25130',
  'AT4G19980',
  'AT3G63450',
  'AT1G53860',
  'AT1G33790',
  'AT3G53190',
  'AT5G11160',
  'AT4G36240',
  'AT3G51290',
  'AT3G22760',
  'AT4G03292',
  'AT5G67200',
  'AT4G28950',
  'AT2G34325',
  'AT3G19500',
  'AT4G10640',
  'AT2G25130',
  'AT4G00200',
  'AT5G67180',
  'AT4G20940',
  'AT2G43800',
  'AT1G55200',
  'AT3G24320',
  'AT1G05950',
  'AT3G27473'],
 14951: ['AT5G66460', 'AT5G63650', 'AT1G33540', 'AT4G28220', 'AT2G32070'],
 10152: ['AT2G04025',
  'AT4G34970',
  'AT1G13620',
  'AT1G04610',
  'AT4G27520',
  'AT5G57420',
  'AT1G26680',
  'AT1G17400',
  'AT5G07550',
  'AT3G60650',
  'AT3G20840',
  'AT2G28870',
  'AT1G18265',
  'AT3G55550',
  'AT4G35720',
  'AT3G57670',
  'AT3G03620',
  'AT2G46850',
  'AT1G21350',
  'AT1G51340',
  'AT5G48130',
  'AT3G52440',
  'AT5G58580',
  'AT1G53070',
  'AT2G42540',
  'AT5G54770',
  'AT2G34020',
  'AT3G54260',
  'AT1G19115',
  'AT3G42670',
  'AT5G41750',
  'AT3G55180',
  'AT2G45740',
  'AT5G09960',
  'AT3G02210',
  'AT5G50335',
  'AT5G01670',
  'AT3G01690',
  'AT1G62290',
  'AT1G49475',
  'AT3G57160',
  'AT1G02770',
  'AT1G74440',
  'AT1G69540',
  'AT1G01570',
  'AT2G35950',
  'AT3G48770',
  'AT4G00490',
  'AT4G24710',
  'AT1G31750',
  'AT3G60670',
  'AT4G19520',
  'AT5G26570',
  'AT3G10200',
  'AT4G37940',
  'AT5G40645',
  'AT3G10940',
  'AT4G25020',
  'AT5G05350',
  'AT1G13530',
  'AT4G37410',
  'AT2G35550',
  'AT1G34180',
  'AT3G53840',
  'AT5G10820',
  'AT2G13610'],
 15197: ['AT2G35770',
  'AT5G05965',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT5G50300',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G46493',
  'AT2G31390'],
 13826: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G31390'],
 62204: ['AT2G35770',
  'AT5G05965',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT5G50300',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G46493',
  'AT2G31390'],
 107618: ['AT2G35770',
  'AT5G05965',
  'AT1G14080',
  'AT1G51500',
  'AT5G50300',
  'AT4G30610',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G31390'],
 24854: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT5G50300',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G31390'],
 67812: ['AT2G35770',
  'AT5G05965',
  'AT1G14080',
  'AT1G51500',
  'AT4G00400',
  'AT5G50300',
  'AT4G30610',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G46493',
  'AT2G31390'],
 32708: ['AT2G35770',
  'AT5G05965',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT5G50300',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G31390'],
 61028: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT3G45320',
  'AT3G55930',
  'AT3G11110',
  'AT1G61900',
  'AT4G17720',
  'AT5G22540',
  'AT5G36880',
  'AT1G05805',
  'AT5G24610',
  'AT2G31390'],
 4573: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT3G45320',
  'AT3G55930',
  'AT3G11110',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT5G22540',
  'AT5G36880',
  'AT1G05805',
  'AT5G24610',
  'AT2G31390'],
 16701: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT3G45320',
  'AT3G55930',
  'AT5G66607',
  'AT3G11110',
  'AT4G00360',
  'AT1G61900',
  'AT4G17720',
  'AT4G30810',
  'AT3G52525',
  'AT5G22540',
  'AT2G22900',
  'AT5G36880',
  'AT1G05805',
  'AT4G01190',
  'AT2G46493',
  'AT2G31390'],
 7125: ['AT2G35770',
  'AT1G52680',
  'AT1G14080',
  'AT1G51500',
  'AT3G45320',
  'AT4G15270',
  'AT3G11110',
  'AT4G17720',
  'AT1G10700',
  'AT5G36880',
  'AT5G24610',
  'AT2G31390'],
 1792: ['AT1G26870',
  'AT5G26670',
  'AT4G35030',
  'AT5G48360',
  'AT3G25130',
  'AT1G33790',
  'AT4G36240',
  'AT3G51290',
  'AT4G28950',
  'AT2G34325',
  'AT3G19500',
  'AT2G25130',
  'AT4G00200',
  'AT4G20940',
  'AT2G43800',
  'AT1G55200',
  'AT3G27473'],
 11594: ['AT4G35720',
  'AT1G26870',
  'AT1G75580',
  'AT3G30350',
  'AT3G10780',
  'AT5G26670',
  'AT4G35030',
  'AT5G48360',
  'AT4G33145',
  'AT3G25130',
  'AT4G19980',
  'AT1G33790',
  'AT5G11160',
  'AT4G36240',
  'AT3G51290',
  'AT4G28950',
  'AT2G34325',
  'AT3G19500',
  'AT2G25130',
  'AT4G00200',
  'AT5G67180',
  'AT4G20940',
  'AT2G43800',
  'AT1G55200',
  'AT3G27473'],
 104554: ['AT1G18250',
  'AT2G03830',
  'AT3G55550',
  'AT4G35720',
  'AT1G26870',
  'AT1G75580',
  'AT3G30350',
  'AT3G10780',
  'AT1G35230',
  'AT5G26670',
  'AT4G35030',
  'AT5G48360',
  'AT4G33145',
  'AT3G25130',
  'AT4G19980',
  'AT1G33790',
  'AT5G11160',
  'AT4G36240',
  'AT3G51290',
  'AT4G03292',
  'AT5G67200',
  'AT4G28950',
  'AT2G34325',
  'AT3G19500',
  'AT4G10640',
  'AT2G25130',
  'AT4G00200',
  'AT5G67180',
  'AT4G20940',
  'AT2G43800',
  'AT1G55200',
  'AT3G27473'],
 84691: ['AT2G03830',
  'AT3G55550',
  'AT4G35720',
  'AT1G26870',
  'AT1G75580',
  'AT3G30350',
  'AT3G10780',
  'AT5G26670',
  'AT4G35030',
  'AT5G48360',
  'AT4G33145',
  'AT3G25130',
  'AT4G19980',
  'AT1G33790',
  'AT5G11160',
  'AT4G36240',
  'AT3G51290',
  'AT4G03292',
  'AT4G28950',
  'AT2G34325',
  'AT3G19500',
  'AT4G10640',
  'AT2G25130',
  'AT4G00200',
  'AT5G67180',
  'AT4G20940',
  'AT2G43800',
  'AT1G55200',
  'AT3G27473'],
 83807: ['AT1G05010',
  'AT1G22880',
  'AT1G71380',
  'AT1G02920',
  'AT2G43000',
  'AT1G21130',
  'AT4G15530',
  'AT5G10520',
  'AT4G29740',
  'AT1G36060',
  'AT3G14362',
  'AT2G28570',
  'AT1G69920',
  'AT2G19590',
  'AT1G53708',
  'AT1G29290',
  'AT3G46970',
  'AT5G54870',
  'AT1G23210',
  'AT3G10320',
  'AT5G25110',
  'AT1G17235',
  'AT3G60780',
  'AT5G45880',
  'AT1G02820',
  'AT5G45810',
  'AT5G53460',
  'AT4G39290',
  'AT1G69830',
  'AT1G32700',
  'AT4G20780',
  'AT5G62150',
  'AT3G29810',
  'AT1G25277',
  'AT4G11360',
  'AT2G44080',
  'AT3G08690',
  'AT5G57640',
  'AT3G59450',
  'AT4G23010',
  'AT2G42280',
  'AT4G37860',
  'AT5G22250',
  'AT3G12490',
  'AT2G35890',
  'AT5G46180',
  'AT3G57550',
  'AT2G25250',
  'AT3G46440',
  'AT3G48180',
  'AT1G32690',
  'AT2G45790',
  'AT1G53400',
  'AT2G21620',
  'AT4G11850',
  'AT2G16500',
  'AT4G04800',
  'AT5G19440',
  'AT5G40370',
  'AT2G29490',
  'AT5G02500',
  'AT3G48990',
  'AT3G58750',
  'AT4G05390',
  'AT3G54620',
  'AT3G06860',
  'AT3G02470',
  'AT3G44110',
  'AT3G62290',
  'AT5G64400'],
 81680: ['AT1G05010',
  'AT1G22880',
  'AT1G02920',
  'AT2G43000',
  'AT4G15530',
  'AT4G29740',
  'AT3G14362',
  'AT1G69920',
  'AT1G29290',
  'AT3G46970',
  'AT5G23830',
  'AT1G23210',
  'AT3G10320',
  'AT1G17235',
  'AT5G44572',
  'AT3G60780',
  'AT5G45880',
  'AT3G52440',
  'AT5G45810',
  'AT5G53460',
  'AT4G39290',
  'AT4G15550',
  'AT4G20780',
  'AT1G25277',
  'AT4G11360',
  'AT2G44080',
  'AT3G08690',
  'AT5G39020',
  'AT5G57640',
  'AT4G23010',
  'AT2G42280',
  'AT5G22250',
  'AT3G12490',
  'AT5G46180',
  'AT1G17240',
  'AT3G57550',
  'AT2G25250',
  'AT3G46440',
  'AT1G32690',
  'AT2G45790',
  'AT4G16250',
  'AT4G11850',
  'AT2G16500',
  'AT5G19440',
  'AT5G40370',
  'AT1G54100',
  'AT5G03630',
  'AT2G29490',
  'AT2G41835',
  'AT3G48990',
  'AT3G58750',
  'AT4G05390',
  'AT3G06860',
  'AT3G02470',
  'AT3G20770',
  'AT3G44110',
  'AT3G62290',
  'AT5G64400'],
 74134: ['AT1G05010',
  'AT1G22880',
  'AT1G02920',
  'AT2G43000',
  'AT1G21130',
  'AT4G15530',
  'AT4G29740',
  'AT3G14362',
  'AT2G28570',
  'AT1G69920',
  'AT2G19590',
  'AT1G53708',
  'AT1G29290',
  'AT3G46970',
  'AT5G23830',
  'AT1G23210',
  'AT3G10320',
  'AT1G17235',
  'AT3G60780',
  'AT5G45880',
  'AT1G02820',
  'AT5G45810',
  'AT5G53460',
  'AT4G39290',
  'AT1G69830',
  'AT4G20780',
  'AT1G25277',
  'AT4G11360',
  'AT2G44080',
  'AT3G08690',
  'AT5G57640',
  'AT4G23010',
  'AT2G42280',
  'AT5G22250',
  'AT3G12490',
  'AT2G35890',
  'AT5G46180',
  'AT3G57550',
  'AT2G25250',
  'AT3G46440',
  'AT1G32690',
  'AT2G45790',
  'AT4G16250',
  'AT4G11850',
  'AT2G16500',
  'AT5G19440',
  'AT5G40370',
  'AT1G54100',
  'AT2G29490',
  'AT5G02500',
  'AT3G48990',
  'AT3G58750',
  'AT4G05390',
  'AT3G54620',
  'AT3G06860',
  'AT3G02470',
  'AT3G44110',
  'AT3G62290',
  'AT5G64400'],
 89957: ['AT1G05010',
  'AT1G22880',
  'AT1G71380',
  'AT1G02920',
  'AT2G43000',
  'AT2G26560',
  'AT1G21130',
  'AT1G02930',
  'AT4G15530',
  'AT1G36060',
  'AT5G27420',
  'AT3G14362',
  'AT2G28570',
  'AT3G18250',
  'AT1G69920',
  'AT1G75000',
  'AT1G29290',
  'AT5G48175',
  'AT2G35930',
  'AT4G11910',
  'AT5G54870',
  'AT1G23210',
  'AT3G10320',
  'AT1G17235',
  'AT3G60780',
  'AT5G03380',
  'AT1G02820',
  'AT3G26080',
  'AT5G53460',
  'AT4G39290',
  'AT4G20780',
  'AT5G62150',
  'AT1G25277',
  'AT4G11360',
  'AT2G44080',
  'AT3G08690',
  'AT5G39020',
  'AT5G57640',
  'AT4G23010',
  'AT2G42280',
  'AT1G32940',
  'AT4G37860',
  'AT5G22250',
  'AT3G12490',
  'AT3G55470',
  'AT4G11370',
  'AT2G35890',
  'AT1G52880',
  'AT2G25250',
  'AT4G35380',
  'AT3G48180',
  'AT1G02880',
  'AT1G77890',
  'AT2G46330',
  'AT2G45790',
  'AT4G30210',
  'AT4G11850',
  'AT2G16500',
  'AT5G19440',
  'AT5G40370',
  'AT2G29490',
  'AT3G48990',
  'AT3G58750',
  'AT4G05390',
  'AT3G54620',
  'AT3G02470',
  'AT3G44110',
  'AT3G62290',
  'AT5G64400'],
 55332: ['AT1G22880',
  'AT4G15530',
  'AT4G26260',
  'AT4G29740',
  'AT3G14362',
  'AT1G69920',
  'AT2G19590',
  'AT1G53708',
  'AT1G29290',
  'AT3G46970',
  'AT5G23830',
  'AT1G23210',
  'AT3G10320',
  'AT5G25110',
  'AT1G17235',
  'AT3G60780',
  'AT5G45880',
  'AT3G52440',
  'AT5G45810',
  'AT5G53460',
  'AT4G39290',
  'AT1G69830',
  'AT4G15550',
  'AT3G59435',
  'AT5G48300',
  'AT1G25277',
  'AT4G11360',
  'AT3G08690',
  'AT5G57640',
  'AT4G23010',
  'AT2G42280',
  'AT3G12490',
  'AT2G35890',
  'AT5G46180',
  'AT1G17240',
  'AT1G60960',
  'AT3G57550',
  'AT2G25250',
  'AT3G46440',
  'AT1G32690',
  'AT2G45790',
  'AT1G53400',
  'AT1G77610',
  'AT4G16250',
  'AT4G11850',
  'AT2G16500',
  'AT5G19440',
  'AT5G40370',
  'AT1G54100',
  'AT2G29490',
  'AT3G58750',
  'AT2G39770',
  'AT3G06860',
  'AT3G02470',
  'AT3G20770',
  'AT3G44110',
  'AT5G64400']}
In [35]:
df = pd.DataFrame({key: pd.Series(value) for key, value in sigs.items()})
df.to_csv("./supp_data/escoring.gene.nsds5.csv", encoding='utf-8', index=False)
In [36]:
escores.to_csv("./supp_data/escoring.gene.csv", encoding='utf-8', index=False)

4. Visualize the top 3 most enriched genes for each reference cell

In [34]:
for cell in r_cells:
    top = escores.loc[:, cell].sort_values(ascending=False).index[0:3]

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2)
    fig.subplots_adjust(wspace=0.3, hspace=0.5)
    fig.set_figheight(15)
    fig.set_figwidth(20)
    
    ax1.scatter(UMAP[:, 0], UMAP[:, 1], s=2, color="lightgrey")
    ax1.scatter(UMAP[int(cell), 0], UMAP[int(cell), 1],
                s=2, color="orange")
    ax1.set_title(cell)
    
    for t, ax in zip(top, [ax2, ax3, ax4]):
        ti = list(hvg_names).index(t)
        ax.scatter(UMAP[:, 0], UMAP[:, 1], s=2,
                   c=HVG[:, ti].data, cmap="Oranges")
        ax.set_title(t)
    plt.show()
    print("\n")






























In [ ]: